# Read data and cleanup
import pandas as pd
from os.path import basename
from datetime import datetime, timedelta
import numpy as np
import locale
import re
import glob
## Generics and config
csv_files_path = "./csv/"
pd.options.plotting.backend = "plotly"
locale.setlocale(locale.LC_TIME, "en_US")
strip_day_re = re.compile('(\d{1,2}) (\w+, \d{4})')
# data buckets
time_data = pd.concat([pd.read_csv(f, sep=";").assign(origin=basename(f).removesuffix("_times.csv")) for f in glob.glob(csv_files_path + "*_times.csv")])
event_data = pd.concat([pd.read_csv(f, sep=";").assign(origin=basename(f).removesuffix("_events_new.csv")) for f in glob.glob(csv_files_path + "*_events_new.csv")])
resources_data = pd.concat([pd.read_csv(f, sep=";").assign(origin=basename(f).removesuffix("_resources.csv")) for f in glob.glob(csv_files_path + "*_resources.csv")])
tension_data = pd.concat([pd.read_csv(f, sep=";").assign(origin=basename(f).removesuffix("_tension.csv")) for f in glob.glob(csv_files_path + "*_tension.csv")])
economy_data = pd.concat([pd.read_csv(f, sep=";").assign(origin=basename(f).removesuffix("_economy.csv")) for f in glob.glob(csv_files_path + "*_economy.csv")])
# date cleanup
time_data['Date'] = pd.to_datetime(time_data['Game time'].str.replace("24:00", "00:00").replace(strip_day_re, '\\2'))
event_data['Date'] = pd.to_datetime(event_data['Date'].str.replace("24:00", "00:00"))
resources_data['Date'] = pd.to_datetime(resources_data['Date'].str.replace("24:00", "00:00").replace(strip_day_re, '\\2'))
tension_data['Date'] = pd.to_datetime(tension_data['Date'].str.replace("24:00", "00:00").replace(strip_day_re, '\\2'))
economy_data['Date'] = pd.to_datetime(economy_data['Date'].str.replace("24:00", "00:00").replace(strip_day_re, '\\2'))
# time cleanup
time_data['Realtime'] = time_data.apply(lambda r: (datetime.strptime(r["Realtime"], '%H:%M:%S') - datetime(1900,1,1)).total_seconds(), axis=1)
# Time Data Preprocessing
time_data_prep = time_data.drop(['Game time'], axis= 'columns').set_index("Date").groupby([pd.Grouper(freq='MS'), 'origin']).filter(lambda x: len(x) >= 28).groupby([pd.Grouper(freq='MS'), 'origin']).agg(np.ptp).reset_index()
summary_time_data = time_data_prep.drop(['origin'], axis= 'columns').groupby("Date").agg(Average=("Realtime", np.mean), Stdev=("Realtime", np.std))
data_origin_count = len(time_data["origin"].unique())
# Event Data Preprocessing
event_categories = {
'RUS DICTATOR WRANGEL': 'RUS',
'BUL PATH CONSTITUTIONALISM': 'BUL',
'BUL PATH ZVENO': 'BUL',
'RUS FALLS': 'RUS',
'SOV WINS RCW': 'RUS',
'ANQ WINS LEP WAR': 'LEP',
'ICW STARTS': 'INC',
'INDOCHINA WINS ICW': 'INC',
'NFA CSN': 'NFA',
'NFA KINGDOM': 'NFA',
'BUL PATH DICTATORSHIP': 'BUL',
'LEP WINS LEP WAR': 'LEP',
'NFA PETAIN': 'NFA',
'SAUDIS UNIFY ARABIA': 'SAU',
'SIK WINS XINJIANG WAR': 'ETS',
'ACW START': 'ACW',
'AUS JOINS ACW': 'ACW',
'CSA JOINS ACW': 'ACW',
'MAC GOES EAST': 'ACW',
'NEE SPAWNED': 'ACW',
'PACT WINS 4BW': '4BW',
'PSA JOINS ACW': 'ACW',
'CHI WINS LEP WAR': 'LEP',
'CHL-ARG WAR': 'ARG',
'JABAL SHAMMAR UNIFIES ARABIA': 'SAU',
'OTT FEDERALIST': 'OTT',
'RUS TSAR DMITRIY': 'RUS',
'RUS TSAR KIRILL': 'RUS',
'HOL GOES SOCIALIST': 'HOL',
'SWF WINS SCW': 'SCW',
'ASSYRIA REVOLTS AGAINST OTT': 'OTT',
'ETS WINS XINJIANG WAR': 'ETS',
'CANADA INTERVENES IN ACW KILL CSA': 'ACW',
'CANADA INTERVENES IN ACW SUPPORT PSA': 'ACW',
'FOP KILLS ARG': 'ARG',
'RUS TSAR WRANGEL': 'RUS',
'ANATOLIAN KURDISTAN REVOLTS AGAINST OTT': 'OTT',
'BULGARIA WINS 4BW': '4BW',
'ARMENIA REVOLTS AGAINST OTT': 'OTT',
'CANADA INTERVENES IN ACW SUPPORT USA': 'ACW',
'MA LOSES NORTHWESTERN WAR': 'ETS',
'MA SURVIVES NORTHWESTERN WAR': 'ETS',
'3RD BOER WAR': '3BW',
'ARG KILLS FOP': 'ARG',
'CYPRUS REVOLTS AGAINST OTT': 'OTT',
'CYRENAICA REVOLTS AGAINST OTT': 'OTT',
'DEMOCRATIC CEN': 'CEN',
'HEJAZ REVOLTS AGAINST OTT': 'OTT',
'IRAQ REVOLTS AGAINST OTT': 'OTT',
'IRAQI KURDISTAN REVOLTS AGAINST OTT': 'OTT',
'SPA WINS SCW': 'SCW',
'TRIPOLITANIA REVOLTS AGAINST OTT': 'OTT',
'3RD BOER WAR NAT WINS': '3BW',
'AZR JOINED OTT': 'OTT',
'NEE WINS ACW': 'ACW',
'PSA WINS ACW': 'ACW',
'SRI UNIFIES ITALY': 'ITA',
'THE SECOND WELTKRIEG': '2WK',
'USA (MAC) WINS ACW': 'ACW',
'TRM DECLARES ON RUS WITH JAP BACKUP': 'TRM',
'AUSTRIA IN 2WK': 'AUS',
'ENTENTE ENTERS 2WK': '2WK',
'PACT IN 2WK': '2WK',
'3RD BOER WAR WP': '3BW',
'DOMINION UNIFIES INDIA': 'IND',
'JAP SUPPORTS FNG AGAINST QIE': 'JAP',
'BHARATIYA UNIFIES INDIA': 'IND',
'GER FALLS': '2WK',
'JAP WAR ON FNG': 'JAP',
'RUS LOSING WKII - MOSCOW': '2WK',
'ANI UNIFIES ITALY': 'ITA',
'AUS FALLS': 'AUS',
'FRANCE LANDS IN FRANCE': '2WK',
'JAP ICHI-GOU': 'JAP',
'NFA FALLS - BY ENG': '2WK',
'CANADA LANDS IN BRITAIN': '2WK',
'OTT WINS IN THE LEVANT': 'OTT',
'PRINCELY UNIFIES INDIA': 'IND',
'CSA WINS ACW': 'ACW',
'YUGOSLAVIA FORMED': 'YUG',
'NFA FALLS - BY FRA': '2WK',
'RUS INTERVENE AGAINST OTT': 'RUS',
'CAN FALLS - BY FRA': '2WK',
'ENG FALLS': '2WK',
'ITA UNIFIES ITALY': 'ITA',
'SIC UNIFIES ITALY': 'ITA',
'SPR WINS SCW': 'SCW',
'TEX WINS ACW': 'ACW',
'USA (DEM) WINS ACW': 'ACW',
'CAN FALLS - BY ENG': '2WK',
'FRA FALLS': '2WK',
'JAP DECLARES ON RUS': 'JAP',
'JAPAN FADING SUN': 'JAP',
'QING EMPIRE UNITES CHINA': 'CHI',
'NFA DEMOCRACY': 'NFA',
'NATPOP SQI WINS LEP WAR': 'LEP',
'MAC GOES WEST': 'ACW',
'RUS REPUBLIC': 'RUS',
'RUS REPUBLIC DMITRIY': 'RUS',
'RUS VOZHD SAVINKOV': 'RUS',
'KUMUL WINS XINJIANG WAR': 'ETS',
'JAP WAR PROGRESS - COULD NOT PUSH': 'JAP',
'RUS DECLARES ON JAP': 'RUS',
'JAP WAR PROGRESS - CONQUERED COASTAL': 'JAP',
'RUS DECLARES ON TRM': 'JAP',
'SRD UNIFIES ITALY': 'ITA',
'BRA WINS AGAINST ARG': 'ARG',
'ARG WINS AGAINST BRA': 'ARG',
'CAN FALLS - BY CSA': 'ACW',
'SHX FALLS': "CHI",
'SQI WINS LEP WAR': "LEP",
'FNG-QIE WAR START': "CHI",
'SZC FALLS': "CHI",
'GEA WINS ICW': "CHI",
'QIE FALLS': "CHI",
'CANADA INTERVENES IN ACW SUPPORT TEX' : "ACW",
'YUN FALLS': "CHI",
'FNG FALLS': "CHI",
'NFA FALLS - BY SWF': "2WK",
'JAP WAR PROGRESS - CONQUERED INTERIOR': "JAP",
'JAP WAR PROGRESS - CONQUERED ALL': "JAP",
}
event_data_prep = event_data.drop(['ROOT', 'FROM', 'origin'], axis= 'columns').set_index("Date").groupby([pd.Grouper(freq='QS'), 'Event']).agg(Count=('Event', np.count_nonzero)).reset_index()
event_data_prep['Category'] = event_data_prep['Event'].map(event_categories).fillna("UNCATEGORIZED")
event_data_prep_totals = event_data.groupby(['Event']).agg(Count=('Event', np.count_nonzero))['Count'].to_dict()
event_data_prep['PPC'] = event_data_prep.apply(lambda x: (x['Count'] / event_data_prep_totals[x['Event']]), axis=1)
# Tension data preprocessing
tension_data_prep = tension_data.set_index("Date").groupby([pd.Grouper(freq='MS'), 'origin']).mean().reset_index()
summary_tension_data = tension_data_prep.set_index("Date").groupby([pd.Grouper(freq='MS')]).agg(Average=("Tension", np.mean), Min=("Tension", np.min), Max=("Tension", np.max))
summary_tension_histogram = tension_data_prep[["Date", "Tension"]]
events_for_tension = event_data.where(event_data["Event"].str.find("THE SECOND WELTKRIEG") >= 0).dropna()\
.set_index("Date").groupby([pd.Grouper(freq='MS'), 'origin']).first().reset_index()[["Date", "origin", "Event"]]
tension_data_prep = tension_data_prep.merge(events_for_tension, on=["Date", "origin"], how="left")
# Plot performance data
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import random
summary_time_data_fig = make_subplots(
rows=1,
cols=1,
shared_xaxes=True,
shared_yaxes=True,
vertical_spacing=0.02,
y_title="Seconds per Month",
subplot_titles=["Summary"],
).update_layout(
title_text="Time per Period - Higher is Worse",
autosize=False,
width=1600,
height=600,
showlegend=True,
hovermode='x unified',
legend=dict(groupclick="toggleitem"),
).update_xaxes(tickangle=45).add_traces(
[
go.Scatter(x=summary_time_data.index, y=(summary_time_data["Average"] + 2*summary_time_data["Stdev"]).clip(lower=0), line_color='rgba(0, 100, 80, 0.05)', name="+2 stdevs", orientation="v", fill="none", fillcolor="rgba(150, 150, 150, 0.05)", legendgroup="summary"),
go.Scatter(x=summary_time_data.index, y=(summary_time_data["Average"] + 1*summary_time_data["Stdev"]).clip(lower=0), line_color='rgba(0, 100, 80, 0.1)', name="+1 stdevs", orientation="v", fill="tonexty", fillcolor="rgba(150, 150, 150, 0.05)", legendgroup="summary"),
go.Scatter(x=summary_time_data.index, y=summary_time_data["Average"], name="s/month", orientation="v", fill="tonexty", line_color='rgba(0, 100, 80, 1)', fillcolor="rgba(100, 150, 100, 0.1)", legendgroup="summary", legendgrouptitle_text="Summary"),
go.Scatter(x=summary_time_data.index, y=(summary_time_data["Average"] - 1*summary_time_data["Stdev"]).clip(lower=0), line_color='rgba(0, 100, 80, 0.1)', name="-1 stdevs", orientation="v", fill="tonexty", fillcolor="rgba(100, 150, 100, 0.1)", legendgroup="summary"),
go.Scatter(x=summary_time_data.index, y=(summary_time_data["Average"] - 2*summary_time_data["Stdev"]).clip(lower=0), line_color='rgba(0, 100, 80, 0.05)', name="-2 stdevs", orientation="v", fill="tonexty", fillcolor="rgba(150, 150, 150, 0.05)", legendgroup="summary"),
],
rows=1, cols=1)
for origin, data in time_data_prep.groupby('origin'):
summary_time_data_fig.add_trace(go.Scatter(x=data['Date'], y=data['Realtime'], name=origin, line_color='rgba({}, 100, 100, 1)'.format(random.randint(50, 150)), legendgroup="details", legendgrouptitle_text="Data Source", visible='legendonly'), row=1, col=1)
summary_time_data_fig.show("notebook")
# Plot tension data
summary_tension_data_fig = make_subplots(
rows=1,
cols=1,
shared_xaxes=True,
shared_yaxes=True,
vertical_spacing=0.01,
y_title="Tension",
subplot_titles=["Summary"]
).update_layout(
title_text="Tension over Time",
autosize=False,
width=1600,
height=600,
showlegend=True,
hovermode='x unified',
legend=dict(groupclick="toggleitem"),
).update_xaxes(tickangle=45).add_traces([
go.Scatter(
x=summary_tension_data.index,
y=summary_tension_data.Average,
line_color='rgb(100,50,50)',
name='Average',
legendgroup="summary",
legendgrouptitle_text="Summary",
),
go.Scatter(
x=summary_tension_data.index,
y=summary_tension_data.Min,
fillcolor='rgba(100,50,50,0.5)',
line_color='rgba(100,50,50,0.5)',
name='Min',
legendgroup="summary",
),
go.Scatter(
x=summary_tension_data.index,
y=summary_tension_data.Max,
fillcolor='rgba(100,50,50,0.5)',
line_color='rgba(100,50,50,0.5)',
name='Max',
legendgroup="summary",
),
go.Scatter(
x=[summary_tension_data.index.min(), summary_tension_data.index.max()],
y=[0.75, 0.75],
line_color='rgba(255,50,50,255)',
line_width=0.5,
marker_size=0.2,
name="WT Threshold",
showlegend=False,
legendgroup="summary",
),
go.Histogram2d(
x=summary_tension_histogram["Date"],
y=summary_tension_histogram["Tension"],
nbinsy=len(list(np.arange(0.0, 1.0, 0.05))),
nbinsx=len(summary_tension_data.index),
colorscale = [[0, 'rgba(0,0,0,0)'], [0.2, 'rgba(200,10,10,0.2)'], [0.75, 'rgba(240,0,0,0.8)'], [0.9, 'rgba(255,0,0,1)'], [1, 'rgba(255,0,0,1)']],
name='Data Density',
showlegend=False,
showscale=False,
histnorm="percent",
legendgroup="summary",
),
], rows=1, cols=1)
for origin, data in tension_data_prep.groupby('origin'):
summary_tension_data_fig.add_trace(
go.Scatter(
x=data['Date'],
y=data['Tension'],
name=origin,
line_color='rgba({}, 100, 100, 1)'.format(random.randint(150, 256)),
legendgroup="details",
legendgrouptitle_text="Data Source",
visible='legendonly',
text=data["Event"],
textposition="bottom right",
mode="lines+text",
)
, row=1, col=1
)
summary_tension_data_fig.show("notebook")
# Plot event frequency
make_subplots(
rows=1,
cols=1,
shared_xaxes=True,
shared_yaxes=False,
vertical_spacing=0.02,
subplot_titles=["Summary"]
).update_layout(
title_text="Event Distribution",
autosize=True,
width=1600,
height=1600,
showlegend=True,
hovermode='x unified'
).update_yaxes(type='multicategory', tickmode="linear", showgrid=True, automargin=True).update_xaxes(tickangle=45, dtick='M6').add_traces(
[
go.Histogram2d(
x=event_data_prep["Date"],
y=[event_data_prep["Category"].array, event_data_prep["Event"].array],
z=event_data_prep["PPC"],
nbinsx=round(len(event_data_prep["Date"])/3),
colorscale=[[0, 'rgba(0,0,0,0)'], [0.2, 'rgba(200,10,10,0.7)'], [0.5, 'rgba(240,0,0,0.8)'], [0.8, 'rgba(255,0,0,1)'], [1, 'rgba(255,0,0,1)']],
histfunc="sum",
zmin=0,
zmax=1,
)
],
rows=1, cols=1).show("notebook")
# Double check for uncategorized events
event_data_prep[event_data_prep["Category"] == "UNCATEGORIZED"]["Event"].unique()
array([], dtype=object)